Code
import eia_api as api
import eia_etl as etl
import eia_metadata as eia_meta
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxThe goal of this doc is to execute an initial data pull of the hourly demand for California balancing authority subregion (CISO). This includes the following four independent system operators:
The data backfill process includes the following steps:
import eia_api as api
import eia_etl as etl
import eia_metadata as eia_meta
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxraw_json = open("../metadata/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]facets_template = {
"parent" : None,
"subba" : None
}
start = datetime.datetime(2018, 6, 20, 1)
end = datetime.datetime(2024, 2, 18, 1)
offset = 2250
eia_api_key = os.getenv('EIA_API_KEY')
meta_path = "../metadata/ciso_log_py.csv"
data_path = "../csv/ciso_grid_py.csv"metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
metadata.meta.keys()
print(metadata.meta["startPeriod"])
print(metadata.meta["endPeriod"])2018-06-19T05
2024-02-20T08
start = datetime.datetime(2018, 7, 1, 1)
# start = datetime.datetime(2024, 1, 1, 1)
end = datetime.datetime(2024, 2, 18, 1)
for i in series.index:
facets = facets_template
facets["parent"] = series.at[i, "parent_id"]
facets["subba"] = series.at[i, "subba_id"]
print(facets)
temp = api.eia_backfile(api_key = eia_api_key,
api_path = api_path+ "data",
facets = facets,
start = start,
end = end,
offset = offset)
ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
ts_obj = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
ts_obj.drop("period", axis = 1, inplace= True)
ts_obj = ts_obj.rename(columns= {"index": "period"})
meta_temp = eia_meta.create_metadata(data = ts_obj, start = start, end = end, type = "backfile")
meta_temp["index"] = 1
meta_df = pd.DataFrame([meta_temp])
if i == series.index.start:
data = ts_obj
meta = meta_df
else:
data = data._append(ts_obj)
meta = meta._append(meta_df){'parent': 'CISO', 'subba': 'PGAE'}
{'parent': 'CISO', 'subba': 'SCE'}
{'parent': 'CISO', 'subba': 'SDGE'}
{'parent': 'CISO', 'subba': 'VEA'}
print(meta)
# The initial pull has some missing values
meta["success"] = True
# Save the data
data.to_csv(data_path)
meta["update"] = True
m = eia_meta.append_metadata(meta_path = "../metadata/ciso_log_py.csv", meta = meta, save = True, init = True)
print(m) index parent subba time start \
0 1 CISO PGAE 2024-02-21 14:25:14.116685+00:00 2018-07-01 01:00:00
0 1 CISO SCE 2024-02-21 14:25:27.859687+00:00 2018-07-01 01:00:00
0 1 CISO SDGE 2024-02-21 14:25:41.422542+00:00 2018-07-01 01:00:00
0 1 CISO VEA 2024-02-21 14:25:54.614824+00:00 2018-07-01 01:00:00
end start_act end_act start_match \
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
end_match n_obs na type update success \
0 True 49393 105 backfile False False
0 True 49393 105 backfile False False
0 True 49393 105 backfile False False
0 True 49393 105 backfile False False
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
index parent subba time start \
0 1 CISO PGAE 2024-02-21 14:25:14.116685+00:00 2018-07-01 01:00:00
0 1 CISO SCE 2024-02-21 14:25:27.859687+00:00 2018-07-01 01:00:00
0 1 CISO SDGE 2024-02-21 14:25:41.422542+00:00 2018-07-01 01:00:00
0 1 CISO VEA 2024-02-21 14:25:54.614824+00:00 2018-07-01 01:00:00
end start_act end_act start_match \
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 01:00:00 2024-02-18 01:00:00 True
end_match n_obs na type update success \
0 True 49393 105 backfile True True
0 True 49393 105 backfile True True
0 True 49393 105 backfile True True
0 True 49393 105 backfile True True
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
We will use Plotly to visualize the series:
d = data.sort_values(by = ["subba", "period"])
p = px.line(data, x="period", y="value", color="subba")
p.show()